The model consists of multiple external factors. These factors combined can result in either desirable or disadvantageous outcomes for the policymakers. Therefore it is of great importance to get more insight into the effects of these external factors on possible policies. That is why a scenario discovery analysis is performed. The method Patient Rule Induction Method, or PRIM, is used to categorize the scenarios and determine which ones best predicts desired outcomes given the inputs of the model. The data from the open exploration stage is used for this.
# Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from ema_workbench.analysis import prim
from ema_workbench import ema_logging
ema_logging.log_to_stderr(ema_logging.INFO)
# Open file with outcomes
outcomes = pd.read_csv('./open_exploration/outcomes_3.csv', index_col=None)
outcomes.head()
# Open file with experiments
experiments = pd.read_csv('./open_exploration/experiments_3.csv', index_col=None)
experiments.head()
# Create a new column that combines all total costs/total deaths from all locations
outcomes['Total Costs'] = outcomes['A.1 Total Costs'] + outcomes['A.2 Total Costs'] + outcomes['A.3 Total Costs'] + outcomes['A.4 Total Costs'] + outcomes['A.5 Total Costs']
outcomes['Total Deaths'] = outcomes['A.1_Expected Number of Deaths'] + outcomes['A.2_Expected Number of Deaths'] + outcomes['A.3_Expected Number of Deaths'] + outcomes['A.4_Expected Number of Deaths'] + outcomes['A.5_Expected Number of Deaths']
outcomes.head()
experiments.columns
# Looking for thresholds that will be used for PRIM
# Thresholds are based on wishes of our client: Rijkswaterstaat
plt.figure(figsize=(12, 10))
plt.suptitle('Percentage of values below the selected thresholds', fontsize=14)
# Subplot RfR Costs
plt.subplot(321)
outcomes['RfR Total Costs'].hist(edgecolor='black', alpha = 1)
# Dashed line symolising threshold
plt.vlines(1.25e9, 0, 10000, colors = "r", linestyles = "dashed")
# Calculating percentage under threshold
percentage_rfr = (np.sum(outcomes['RfR Total Costs']< 1.25e9) / len(outcomes['RfR Total Costs']))* 100
plt.title('Total RfR Costs: '+ str(percentage_rfr)+ '%')
# Subplot Expected Evacuation Costs
plt.subplot(322)
# Dashed line symolising threshold
outcomes['Expected Evacuation Costs'].hist(edgecolor='black', alpha = 1)
plt.vlines(50000, 0, 40000, colors = "r", linestyles = "dashed")
# Calculating percentage under threshold
percentage_eec = (np.sum(outcomes['Expected Evacuation Costs']<= 50000)/len(outcomes['Expected Evacuation Costs']))* 100
plt.title('Total Expected Evacuation Costs: '+ str(percentage_eec)+ '%')
# Subplot Total Deaths
plt.subplot(323)
outcomes['Total Deaths'].hist(edgecolor='black', alpha = 1)
# Dashed line symolising threshold
plt.vlines(0.005, 0, 40000, colors = "r", linestyles = "dashed")
# Calculating percentage under threshold
percentage_td = (np.sum(outcomes['Total Deaths']< 0.005)/len(outcomes['Total Deaths']))* 100
plt.title('Total Deaths: '+ str(percentage_td)+ '%')
# Subplot Total Costs
plt.subplot(325)
outcomes['Total Costs'].hist(edgecolor='black', alpha = 1)
# Dashed line symolising threshold
plt.vlines(0.75e9, 0, 30000, colors = "r", linestyles = "dashed")
# Calculating percentage under threshold
percentage_tc = (np.sum(outcomes['Total Costs']< 0.75e9)/len(outcomes['Total Costs']))* 100
plt.title('Total Costs: '+ str(percentage_tc)+ '%')
plt.show()
# Set thresholds as input for PRIM
# Thresholds are based on wishes of Rijkswaterstaat
x = experiments.iloc[:, 1:-4]
y_death = outcomes['Total Deaths'] < 0.005
y_totalcosts = outcomes['Total Costs'] < 0.75e9
y_rfrcosts = outcomes['RfR Total Costs'] < 1.25e9
y_evaccosts = outcomes['Expected Evacuation Costs'] < 50000
PRIM subspace partitioning will be used for the following outcomes: expected deaths, total costs, RfR costs and evacuation costs. The base case data will also be used with PRIM to best compare the results and effects of uncertainty.
Lastly, worst scenarios will also be determines using PRIM.
# Perform PRIM algorithm using a 0.8 threshold. This is the density threshold that a box has to meet.
prim_alg = prim.Prim(x, y_death, threshold=0.8, peel_alpha=0.1)
box_death1 = prim_alg.find_box()
# Plot trade off graph between density and coverage of boxes
box_death1.show_tradeoff()
box_death1.inspect(style='graph')
plt.show
# Plot boxes
box_death1.show_pairs_scatter()
plt.show()
# Perform PRIM algorithm using a 0.8 threshold. This is the density threshold that a box has to meet.
prim_alg = prim.Prim(x, y_totalcosts, threshold=0.8, peel_alpha=0.1)
box_totalcosts1 = prim_alg.find_box()
# Plot trade off graph between density and coverage of boxes
box_totalcosts1.show_tradeoff()
box_totalcosts1.inspect(style='graph')
plt.show
# Plot boxes
box_totalcosts1.show_pairs_scatter()
plt.show()
# Perform PRIM algorithm using a 0.8 threshold. This is the density threshold that a box has to meet.
prim_alg = prim.Prim(x, y_rfrcosts, threshold=0.8, peel_alpha=0.1)
box_rfr1 = prim_alg.find_box()
# Plot trade off graph between density and coverage of boxes
box_rfr1.show_tradeoff()
box_rfr1.inspect(style='graph')
plt.show
# Plot boxes
box_rfr1.show_pairs_scatter()
plt.show()
# Perform PRIM algorithm using a 0.8 threshold. This is the density threshold that a box has to meet.
prim_alg_evaccosts = prim.Prim(x, y_evaccosts, threshold=0.8, peel_alpha=0.1)
box_evaccosts1 = prim_alg_evaccosts.find_box()
# Plot trade off graph between density and coverage of boxes
box_evaccosts1.show_tradeoff()
box_evaccosts1.inspect(style='graph')
plt.show
# Plot boxes
box_evaccosts1.show_pairs_scatter()
plt.show()
# Set new thresholds that encompass the worst oucomes, again based on wishes of Rijkswaterstaat
x = experiments.iloc[:, 1:-4]
y_death_w = outcomes['Total Deaths'] > 0.005
y_totalcosts_w = outcomes['Total Costs'] > 0.75e9
y_rfrcosts_w = outcomes['RfR Total Costs'] > 1.25e9
y_evaccosts_w = outcomes['Expected Evacuation Costs'] >= 50000
# Perform PRIM algorithm using a 0.8 threshold. This is the density threshold that a box has to meet.
prim_alg = prim.Prim(x, y_death_w, threshold=0.8, peel_alpha=0.1)
box_death1 = prim_alg.find_box()
# Plot trade off graph between density and coverage of boxes
box_death1.show_tradeoff()
box_death1.inspect(style='graph')
plt.show
# Plot boxes
box_death1.show_pairs_scatter()
plt.show()
# Perform PRIM algorithm using a 0.8 threshold. This is the density threshold that a box has to meet.
prim_alg = prim.Prim(x, y_totalcosts_w, threshold=0.8, peel_alpha=0.1)
box_totalcosts1 = prim_alg.find_box()
# Plot trade off graph between density and coverage of boxes
box_totalcosts1.show_tradeoff()
box_totalcosts1.inspect(style='graph')
plt.show
# Plot boxes
box_totalcosts1.show_pairs_scatter()
plt.show()
# Perform PRIM algorithm using a 0.8 threshold. This is the density threshold that a box has to meet.
prim_alg = prim.Prim(x, y_rfrcosts_w, threshold=0.8, peel_alpha=0.1)
box_rfr1 = prim_alg.find_box()
# Plot trade off graph between density and coverage of boxes
box_rfr1.show_tradeoff()
box_rfr1.inspect(style='graph')
plt.show
# Plot boxes
box_rfr1.show_pairs_scatter()
plt.show()
y_evaccosts_w
# Perform PRIM algorithm using a 0.8 threshold. This is the density threshold that a box has to meet.
# Only 60 cases of 0.1% are of interest. The othe cases do not fit within the treshold.
prim_alg_evaccosts = prim.Prim(x, y_evaccosts_w, threshold=0.8, peel_alpha=0.1)
box_evaccosts1 = prim_alg_evaccosts.find_box()
# Plotting boxes gives empty box
box_evaccosts1.show_tradeoff()
box_evaccosts1.inspect(style='graph')
plt.show
# Running the following code gives the errosr: 'ValueError: No variables found for grid columns.'
# This is due to the fact that ~99.9% of results are within the treshold of Rijkswaterstaat
#box_evaccosts1.show_pairs_scatter()
#plt.show()
# Open file with outcomes and experiments from the base case
outcomes_bc = pd.read_csv('./open_exploration/outcomes_basecase.csv', index_col=None)
experiments_bc = pd.read_csv('./open_exploration/experiments_basecase.csv', index_col=None)
# Create a new column that combines all total costs/total deaths from all locations
outcomes_bc['Total Costs'] = outcomes_bc['A.1 Total Costs'] + outcomes_bc['A.2 Total Costs'] + outcomes_bc['A.3 Total Costs'] + outcomes_bc['A.4 Total Costs'] + outcomes_bc['A.5 Total Costs']
outcomes_bc['Total Deaths'] = outcomes_bc['A.1_Expected Number of Deaths'] + outcomes_bc['A.2_Expected Number of Deaths'] + outcomes_bc['A.3_Expected Number of Deaths'] + outcomes_bc['A.4_Expected Number of Deaths'] + outcomes_bc['A.5_Expected Number of Deaths']
# Set thresholds as input for PRIM
# Thresholds are still based on wishes of Rijkswaterstaat
# Since there are no policies, evacuation costs and RfR costs are 0 in every scenario. These variables will be disregarded.
x_bc = experiments_bc.iloc[:, 1:-4]
y_death_bc = outcomes_bc['Total Deaths'] < 0.005
y_totalcosts_bc = outcomes_bc['Total Costs'] < 0.75e9
# Expected deaths will also be disregarded as apparantly 0 results note expected deaths under 0.005
y_death_bc.sum()
# Perform PRIM algorithm using a 0.8 threshold. This is the density threshold that a box has to meet.
prim_alg = prim.Prim(x_bc, y_totalcosts_bc, threshold=0.8, peel_alpha=0.1)
box_totalcosts1 = prim_alg.find_box()
# Plot trade off graph between density and coverage of boxes
box_totalcosts1.show_tradeoff()
box_totalcosts1.inspect(style='graph')
plt.show
# Plot boxes
box_totalcosts1.show_pairs_scatter()
plt.show()